---
title: "Throne Speech Fulfillment Code"
author: "John Kennedy"
date: "April 21, 2020"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
```

```{r, label = read, echo=FALSE, results='hide'}
library(MASS)
library(haven)
library(readxl)
library(freqdist)
library(car)
library(DAMisc)
library(ggplot2)
library(foreign)
library(plm)
library(broom)
library(mfx)
library(splines)
library(stargazer)
library(games)
library(gmodels)
library(foreign)
library(reshape2)
library(survival)
library(survminer)
library(effects)
library(lmtest)
library(car)
library(margins)
library(coefplot)
library(ggplot2)
library(ggeffects)
library(rio)
library(readxl)
library(readr)
library(glue)
library(stringr)
library(dplyr)
library(tidyr)


promise <- import("promise_data_0421.csv")
##Creating factors/Recoding
promise$achieved <- as.factor(promise$`Promise Achieved?`)
promise$change_type <- as.factor(promise$`Type of Change`)
# promise$maj <- as.factor(promise$Majority)
promise$firstmandate <- as.factor(promise$`First Mandate`)
promise$CPC <- as.factor(promise$CPC)
promise$subject <- as.factor(promise$`Subject of Promise`)
promise$party <- as.factor(promise$Party)
promise$surplus <- as.factor(promise$`Budget Surplus`)
promise$spellfac<- as.factor(promise$QTM2)
promise$EconPol<- as.factor(promise$EconPol)
promise$Recession<- as.factor(promise$Recession)
promise$Reintroduce<- as.factor(promise$Reintroduce)
#promise$budget <- promise$`Budget ($000,000)`
promise$maj = factor(
  promise$Majority, levels = c(0,1),
  labels = c("Minority", "Majority")
)


##Changing Reference Levels
promise$change_type <- relevel(promise$change_type, 
                               ref = "Status quo")
promise$subject <- relevel(promise$subject, ref = "Other")

promise$StatQuo <- car::recode(promise$`Type of Change`, 
  "6 = 'StatQuo'; 1:5 = 'Non-SQ'; else = NA", as.factor=TRUE,
  levels=c("Non-SQ", "StatQuo"))

###Fixing Promise Number

prom1 <- promise %>% filter(!is.na(promiseno))
maxpn <- max(prom1$promiseno)
prom2 <- promise %>% filter(is.na(promiseno))


unprom <- unique(glue("{prom2$`Text of Promise`}:{prom2$`Type of Change`}:{prom2$`Promised Action`}:{prom2$`Promised Type/Output`}"))
prom2$promiseno <- match(glue("{prom2$`Text of Promise`}:{prom2$`Type of Change`}:{prom2$`Promised Action`}:{prom2$`Promised Type/Output`}"), unprom)
prom2$promiseno <- prom2$promiseno + maxpn

promise <- rbind(prom1, prom2)
promise <- promise %>% 
  group_by(promiseno) %>% 
  mutate(counter = seq_along(qtr))
pm_dates <- import("pm_dates 0417[1].csv")
# pm_dates <- pm_dates_0417

pm_dates$date_speech <- as.Date(pm_dates$date_speech, "%m/%d/%Y")

pm_dates$end_election <- as.Date(pm_dates$end_election, "%m/%d/%Y")

pm_dates$end_electionq <- zoo::as.yearqtr(pm_dates$end_election)


promise2 <- promise %>% 
  group_by(promiseno) %>% 
  mutate(counter = seq_along(qtr)-1)
promise2$date_speech <- as.Date(promise2$date_speech, "%Y-%m-%d")
promise3 <- left_join(promise2, pm_dates)

promise3$qtr <- zoo::as.yearqtr(promise3$qtr)
promise3$counter_election <- with(promise3, ifelse(qtr <= end_electionq, counter, NA))

```

We could calculate the total number of fulfilled promises in the 1993-2011 period as follows: 

```{r}
## fulfilled promises
promise3 %>% 
  filter(!is.na(counter_election) & elec_year %in% 1993:2011) %>% 
  group_by(promiseno) %>% 
  summarise(fulfill = max(fulfill, na.rm=TRUE)) %>% 
  ungroup %>%
  dplyr::select(fulfill) %>% 
  pull %>% 
  mean
```

For the whole period, our promise fulfillment is as follows

```{r}
## fulfilled promises
promise3 %>% 
  filter(!is.na(counter_election)) %>% 
  group_by(promiseno) %>% 
  summarise(fulfill = max(fulfill, na.rm=TRUE)) %>% 
  ungroup %>%
  dplyr::select(fulfill) %>% 
  pull %>% 
  mean
```

Using Thomson et. al.'s (2017) data, we calculate their promise fulfillment rate (fully fulfilled) as: 

```{r}
thom <- import("thomson_rep/AJPS_CPPG_pledges_10April2017.dta")
thom %>% 
  filter(country == 11 & govparty == 1) %>%
  summarise(tfultot = mean(fulfil3==2, na.rm=TRUE)) %>% 
  pull
```

and the partially fulfilled rate as: 

```{r}
thom %>% 
  filter(country == 11 & govparty == 1) %>%
  summarise(tfultot = mean(fulfil3%in% c(1,2), na.rm=TRUE)) %>% 
  pull
```

To make Figure 1a, first we need to calculate the fulfillment rate by quarter since speech. 

```{r}
tab <- tabn <- with(subset(promise3, !is.na(counter_election)), 
            table(counter_election, fulfill))
tab <- prop.table(tab, 1)

plot.df1 <- data.frame(
  quarter = 0:19, 
  prop_fulfill = tab[,2], 
  n = rowSums(tabn))
```

Next, we need to estimate the parametric model without any of the controls or structural variables and add the resulting predictions into our dataset. 

```{r}
tmpdat <- promise3 %>% 
  dplyr::select(fulfill, counter_election) %>% 
  na.omit
LEmod <- glm(fulfill ~ poly(counter_election, 3), 
           family=binomial, 
           data=tmpdat)
plot.df1$phat <- predict(LEmod, newdata=data.frame(counter_election=0:19), type="response")
```

Finally, we can make the figure

```{r, fig.height=6, fig.width=8, out.width="50%", fig.align="center"}
EC <- ggplot(plot.df1, aes(x=quarter, y=prop_fulfill) ) + 
  geom_point() + 
  geom_segment(aes(xend=quarter, yend=0)) + 
  geom_text(aes(y=prop_fulfill+ .0075, label=n), size=3) + 
  theme_bw() + 
  labs(x="Quarters Since Speech", y="Proportion of Promises Fulfilled") +   
  geom_line(data=plot.df1, aes(y=phat), color="red") 
EC 
```

To make Figure 1b, we need to multiply the probabilities found above (from both the raw and the parametric model predictions) by the number of speeches remaining to be fulfilled at any point in time.  

```{r}
plot.df1$cumul_prop = cumsum(plot.df1$prop_fulfill*plot.df1$n)/659
plot.df1$cumul_prob = cumsum(plot.df1$n*plot.df1$phat)/659
```

To make the plotting easier, we reshape the data from wide to long format: 

```{r}
plot.df2 <- plot.df1 %>% 
  pivot_longer(cols=c(cumul_prop, cumul_prob), names_to="estimate", values_to="prob")
plot.df2$estimate <- as.factor(plot.df2$estimate)
levels(plot.df2$estimate) <- c("Predicted", "Raw")
```

Finally, we can make the graph: 

```{r, fig.height=6, fig.width=8, out.width="50%", fig.align="center"}
ggplot(plot.df2) + 
  geom_line(aes(x=quarter, y=prob, colour=estimate, linetype=estimate), size=1) + 
  labs(x="Quarters Since Speech", y="Cumulative Probability of Fulfillment")+ 
  scale_colour_manual(values=c("gray50", "black"), name="Model") + 
  scale_linetype_manual(values=c(1,2), name="Model") + 
  theme_bw() + 
  theme(legend.position="top")
```

Next, we can run the model. 

```{r}
fulmod <- glm(fulfill ~ maj + firstmandate + Recession + budget + 
                CPC + change_type + Reintroduce + 
                poly(counter_election, 3, raw=TRUE),  
            family = binomial (link = "logit"), 
            data = subset(promise3, !is.na(counter_election)))
summary(fulmod)
logitmfx(fulmod, data = subset(promise3, !is.na(counter_election)), atmean=FALSE)
exp(coef(fulmod))
binfit(fulmod)
```

To calculate the average marginal effects predictions, we can use the `probci` function from the `DAMisc` package.  There is an element in the output called `plot.data` that has data amenable for plotting.  We save those data as the object `pp`.  


```{r}
e <- probci(fulmod, 
            data=subset(promise3, !is.na(counter_election)), 
            changeX = c("maj", "counter_election"), 
            xvals = list(
              maj = factor(c(1,2), labels=c("Minority", "Majority")), 
              counter_election=0:19), 
            returnProbs=TRUE)

pp <- e$plot.data
pp <- pp %>% setNames(c("maj", "counter_election", "pred_prob", "lower", "upper"))
```

We use the data to make a Figure 2a. 


```{r, fig.height=6, fig.width=6, out.width="65%", fig.align="center"}
ggplot(pp, aes(x=counter_election, y=pred_prob, colour=maj, 
               fill=maj, linetype=maj)) + 
  geom_line(size=1) + 
  geom_ribbon(aes(ymin=lower, ymax=upper), alpha=.25, lty=0) + 
  scale_linetype_manual(values=c(2,1), name="Government Status") + 
  theme_bw() + 
  labs(x="Quarters Since Speech", y="Probability of Fulfillment", colour="Government Status", fill="Government Status") + 
  theme(legend.position="top")
```

To make figure 2b, is a bit more complicated.  First, we have to get the number of un-fulfilled promises by quarter, we store these in `my_n`.  We replicate each number twice because the plotting data are set up such that the `maj` varies within each `counter_election`.   

```{r}
my_n <- rep(plot.df1$n, each=2)
```

Next, we multiply the predicted probabilities from our `probci` output above by the number of un-fulfilled promises.  There is an element called `probs` in the output.  It is a list where each element corresponds to a single row in `plot.data`.  The probs element is an $n\times n_{sim}$, or $3008\times 2500$ in this case, matrix of predicted probabilities for the condition indicated in the row of `plot.data`.  That is, it holds `counter_election` and `maj` fixed at the values indicated in `plot.data` and all of the other values of the variables remain as they are in the original data. 

```{r}
nfp <- lapply(1:length(e$probs), function(i)(e$probs[[i]]*my_n[i]))
```

Next, we calculate the mean predicted number of statements fulfilled across all observations for each iteration of the simulation: 

```{r}
nfp.sum <- sapply(nfp, function(x)colMeans(x))
```

The next step is to get the cumulative sum of fulfilled statements for the majority governments and the minoritiy governments and divide by 659, the total number of promises, to turn into a proportion. 

```{r}
cpmaj <- apply(nfp.sum[, seq(2,40, by=2)], 1, cumsum)/659
cpmin <- apply(nfp.sum[, seq(1,40, by=2)], 1, cumsum)/659
```

As an intermediate step, we can take the majority-minority difference and the calculate the proportion of simulations where the majority is bigger than the minority for each condition.  The result below shows what proportion of the simulations finds the majority probability higher than the minority probability by quarter.  As you can see, this always happens.  This indicates that the effect is significant for all time-points. 

```{r}
diffs <- cpmaj-cpmin  
d <- apply(diffs, 1, function(x)mean(x > 0))
names(d) <- 0:19
d

```

Next, we can create the summary for majority and minority conditions - the predicted probability (the median of the simulated predicted values) and the upper and lower 95\% confidence intervals.  We can then re-sort the data so that we can add these summaries back into the original plotting dataset, `pp`. 

```{r}
cpmajsum <- apply(cpmaj, 1, quantile, c(.5, .025,.975))
cpminsum <- apply(cpmin, 1, quantile, c(.5, .025,.975))
pp <- pp[order(pp$maj, pp$counter_election), ]
cpall <- as.data.frame(rbind(t(cpminsum), t(cpmajsum)))
names(cpall) <- c("probc", "lowerc", "upperc")
pp <- cbind(pp, cpall)
```

Now, all that's left to do is make the graph. 

```{r, fig.height=6, fig.width=6, out.width="65%", fig.align="center"}
ggplot(pp, aes(x=counter_election, y=probc, colour=maj, linetype=maj, fill=maj)) + 
  geom_line(size=1) + 
  geom_ribbon(aes(ymin=lowerc, ymax=upperc), alpha=.25, lty=0) + 
  theme_bw() + 
  scale_linetype_manual(values=c(2,1), name="Government Status") + 
  labs(x="Quarters Since Speech", y="Probability of Fulfillment", colour="Government Status", linetype="Government Status", fill="Government Status") + 
  theme(legend.position="top")
```

We follow exactly the same steps to make the other two graphs. 

```{r, fig.height=6, fig.width=8, out.width="65%", fig.align="center"}
e <- probci(fulmod, 
            data=subset(promise3, !is.na(counter_election)), 
            changeX = c("firstmandate", "counter_election"), 
            xvals = list(
              firstmandate = factor(c(1,2), labels=c("0", "1")), 
              counter_election=0:19), 
            returnProbs=TRUE)

pp <- e$plot.data
pp <- pp %>% setNames(c("firstmandate", "counter_election", "pred_prob", 
                        "lower", "upper"))
levels(pp$firstmandate) <- c("No", "Yes")
```

Here is Figure 3a: 

```{r, fig.height=6, fig.width=6, out.width="65%", fig.align="center"}
ggplot(pp, aes(x=counter_election, y=pred_prob, colour=firstmandate, 
               fill=firstmandate, linetype=firstmandate)) + 
  geom_line(size=1) + 
  geom_ribbon(aes(ymin=lower, ymax=upper), alpha=.25, lty=0) + 
  theme_bw() + 
  scale_linetype_manual(values=c(2,1), name="First Mandate") + 
  labs(x="Quarters Since Speech", y="Probability of Fulfillment", colour="First Mandate", fill="First Mandate") + 
  theme(legend.position="top")
```

Here is the pre-processing for Figure 3b: 

```{r}

nfp <- lapply(1:length(e$probs), function(i)(e$probs[[i]]*my_n[i]))
nfp.sum <- sapply(nfp, function(x)colMeans(x))
cpmaj <- apply(nfp.sum[, seq(2,40, by=2)], 1, cumsum)/659
cpmin <- apply(nfp.sum[, seq(1,40, by=2)], 1, cumsum)/659
diffs <- cpmaj-cpmin  - 
apply(diffs, 1, function(x)mean(x > 0))
dim(cpmaj)
cpmajsum <- apply(cpmaj, 1, quantile, c(.5, .025,.975))
cpminsum <- apply(cpmin, 1, quantile, c(.5, .025,.975))
pp <- pp[order(pp$firstmandate, pp$counter_election), ]
cpall <- as.data.frame(rbind(t(cpminsum), t(cpmajsum)))
names(cpall) <- c("probc", "lowerc", "upperc")
pp <- cbind(pp, cpall)
```

Here is Figure 3b: 

```{r, fig.height=6, fig.width=6, out.width="65%", fig.align="center"}
ggplot(pp, aes(x=counter_election, y=probc, colour=firstmandate, linetype=firstmandate, fill=firstmandate)) + 
  geom_line(size=1) + 
  geom_ribbon(aes(ymin=lowerc, ymax=upperc), alpha=.25, lty=0) + 
  theme_bw() + 
  scale_linetype_manual(values=c(2,1), name="First Mandate") + 
  labs(x="Quarters Since Speech", y="Probability of Fulfillment", colour="First Mandate", linetype="First Mandate", fill="First Mandate") + 
  theme(legend.position="top")
```


Here is Figure 4a

```{r, fig.height=6, fig.width=8, out.width="65%", fig.align="center"}
e <- probci(fulmod, 
            data=subset(promise3, !is.na(counter_election)), 
            changeX = c("Recession", "counter_election"), 
            xvals = list(
              Recession = factor(c(1,2), labels=c("0", "1")), 
              counter_election=0:19), 
            returnProbs=TRUE)

pp <- e$plot.data
pp <- pp %>% setNames(c("Recession", "counter_election", "pred_prob", 
                        "lower", "upper"))
levels(pp$Recession) <- c("No", "Yes")
ggplot(pp, aes(x=counter_election, y=pred_prob, colour=Recession, 
               fill=Recession, linetype=Recession)) + 
  geom_line(size=1) + 
  geom_ribbon(aes(ymin=lower, ymax=upper), alpha=.25, lty=0) + 
  scale_linetype_manual(values=c(2,1), name="Recession") + 
  theme_bw() + 
  labs(x="Quarters Since Speech", y="Probability of Fulfillment", colour="Recession", fill="Recession") + 
  theme(legend.position="top")
```

Next, the post-processing for Figure 4b: 

```{r}
nfp <- lapply(1:length(e$probs), function(i)(e$probs[[i]]*my_n[i]))
nfp.sum <- sapply(nfp, function(x)colMeans(x))
cpmaj <- apply(nfp.sum[, seq(2,40, by=2)], 1, cumsum)/659
cpmin <- apply(nfp.sum[, seq(1,40, by=2)], 1, cumsum)/659
diffs <- cpmaj-cpmin  
apply(diffs, 1, function(x)mean(x > 0))
dim(cpmaj)
cpmajsum <- apply(cpmaj, 1, quantile, c(.5, .025,.975))
cpminsum <- apply(cpmin, 1, quantile, c(.5, .025,.975))
pp <- pp[order(pp$Recession, pp$counter_election), ]
cpall <- as.data.frame(rbind(t(cpminsum), t(cpmajsum)))
names(cpall) <- c("probc", "lowerc", "upperc")
pp <- cbind(pp, cpall)
```

And Figure 4b itself: 

```{r, fig.height=6, fig.width=6, out.width="65%", fig.align="center"}
ggplot(pp, aes(x=counter_election, y=probc, colour=Recession, linetype=Recession, fill=Recession)) + 
  geom_line(size=1) + 
  geom_ribbon(aes(ymin=lowerc, ymax=upperc), alpha=.25, lty=0) + 
  theme_bw() + 
  scale_linetype_manual(values=c(2,1), name="Recession") + 
  labs(x="Quarters Since Speech", y="Probability of Fulfillment", colour="Recession", linetype="Recession", fill="Recession") + 
  theme(legend.position="top")
```


